Library import

library(SpatialExperiment)
library(data.table)
library(scater) # it imports scuttle
library(cowplot)
library(ggplot2)
theme_set(theme_bw())
library(matrixStats)
library(dplyr)
library(tidyr)
library(tibble)
library(arrow)
library(scales)
library(sf)
library(tmap)

Read-in function

readCosmxSPE <- function(dirname = dirname, 
                         countmatfpattern = "exprMat_file.csv", 
                         metadatafpattern = "metadata_file.csv", 
                         coord_names = c("CenterX_global_px",
                                         "CenterY_global_px")){
  
  countmat_file <- file.path(dirname, list.files(dirname, countmatfpattern))
  metadata_file <- file.path(dirname, list.files(dirname, metadatafpattern))
  
  # Read in 
  countmat <- data.table::fread(countmat_file)
  metadata <- data.table::fread(metadata_file)
  
  # Count matrix   
  counts <- merge(countmat, metadata[, c("fov", "cell_ID")])
  counts <- subset(counts, select = -c(cell, fov, cell_ID))
  cell_codes <- rownames(counts)
  features <- colnames(counts)
  counts <- t(counts)
  
  rownames(counts) <- features
  colnames(counts) <- cell_codes
  
  # rowData (does not exist)
  
  # colData
  colData <- merge(metadata, countmat[, c("fov", "cell_ID")])
  
  spe <- SpatialExperiment::SpatialExperiment(
    assays = list(counts = counts),
    # rowData = rowData,
    colData = colData,
    spatialCoordsNames = coord_names
  )
  
  return(spe)
}

Setting directories

dirname <- "/NAS06/work/Spatial_omics/CosMx/Human cortex"
count_pattern <- "S3_exprMat_file.csv.gz"
meta_pattern <- "S3_metadata_file.csv.gz"
sample_name <- "CosMx_Human_Cortex"

CosMx dataset exploratory analysis for preprocessing

SpatialExperiment object creation and exploration

spe <- readCosmxSPE(dirname = dirname, countmatfpattern = count_pattern, metadatafpattern = meta_pattern)
spe <- scuttle::addPerCellQC(spe, subsets=list(NegProb = grep("^NegPrb", rownames(spe))))
names(colData(spe)@listData)
##  [1] "cell_ID"                                        
##  [2] "fov"                                            
##  [3] "cell"                                           
##  [4] "nCount_RNA"                                     
##  [5] "nFeature_RNA"                                   
##  [6] "nCount_negprobes"                               
##  [7] "nFeature_negprobes"                             
##  [8] "nCount_falsecode"                               
##  [9] "nFeature_falsecode"                             
## [10] "Area"                                           
## [11] "AspectRatio"                                    
## [12] "Width"                                          
## [13] "Height"                                         
## [14] "Mean.Histone"                                   
## [15] "Max.Histone"                                    
## [16] "Mean.G"                                         
## [17] "Max.G"                                          
## [18] "Mean.rRNA"                                      
## [19] "Max.rRNA"                                       
## [20] "Mean.GFAP"                                      
## [21] "Max.GFAP"                                       
## [22] "Mean.DAPI"                                      
## [23] "Max.DAPI"                                       
## [24] "cell_id"                                        
## [25] "assay_type"                                     
## [26] "dualfiles"                                      
## [27] "Run_name"                                       
## [28] "ISH.concentration"                              
## [29] "Dash"                                           
## [30] "tissue"                                         
## [31] "slide_ID"                                       
## [32] "Run_Tissue_name"                                
## [33] "Panel"                                          
## [34] "assay_type.1"                                   
## [35] "version"                                        
## [36] "Area.um2"                                       
## [37] "CenterX_local_px"                               
## [38] "CenterY_local_px"                               
## [39] "propNegative"                                   
## [40] "complexity"                                     
## [41] "errorCtEstimate"                                
## [42] "percOfDataFromError"                            
## [43] "qcFlagsRNACounts"                               
## [44] "qcFlagsCellCounts"                              
## [45] "qcFlagsCellPropNeg"                             
## [46] "qcFlagsCellComplex"                             
## [47] "qcFlagsCellArea"                                
## [48] "qcCellsFlagged"                                 
## [49] "median_negprobes"                               
## [50] "negprobes_quantile_0.9"                         
## [51] "median_RNA"                                     
## [52] "RNA_quantile_0.9"                               
## [53] "nCell"                                          
## [54] "nCount"                                         
## [55] "nCountPerCell"                                  
## [56] "nFeaturePerCell"                                
## [57] "propNegativeCellAvg"                            
## [58] "complexityCellAvg"                              
## [59] "errorCtPerCellEstimate"                         
## [60] "percOfDataFromErrorPerCell"                     
## [61] "qcFlagsFOV"                                     
## [62] "RefinedClusts_Final"                            
## [63] "prob"                                           
## [64] "RNA_spatialClusteringNeighbours_astro.1"        
## [65] "RNA_spatialClusteringNeighbours_astro.2"        
## [66] "RNA_spatialClusteringNeighbours_endothelial"    
## [67] "RNA_spatialClusteringNeighbours_Inh.1"          
## [68] "RNA_spatialClusteringNeighbours_Inh.2"          
## [69] "RNA_spatialClusteringNeighbours_Inh.3"          
## [70] "RNA_spatialClusteringNeighbours_L2_3"           
## [71] "RNA_spatialClusteringNeighbours_L4"             
## [72] "RNA_spatialClusteringNeighbours_L6"             
## [73] "RNA_spatialClusteringNeighbours_microglia.1"    
## [74] "RNA_spatialClusteringNeighbours_microglia.2"    
## [75] "RNA_spatialClusteringNeighbours_oligodendrocyte"
## [76] "RNA_spatialClusteringNeighbours_OPC"            
## [77] "RNA_spatialClusteringNeighbours_unknown"        
## [78] "spatialClusteringAssignments"                   
## [79] "sample_id"                                      
## [80] "sum"                                            
## [81] "detected"                                       
## [82] "subsets_NegProb_sum"                            
## [83] "subsets_NegProb_detected"                       
## [84] "subsets_NegProb_percent"                        
## [85] "total"
dim(spe)
## [1]   6622 188686

Selecting only numeric and integer variables from metadata then only variables of interest from the list

temp <- unlist(lapply(spe@colData@listData, function(x) is.numeric(x) | is.integer(x)))
num_variables <- c()
for (i in 1:length(temp)){
  num_variables[i] <- ifelse(temp[i] == TRUE, names(temp[i]), NA)
  num_variables <- num_variables[!is.na(num_variables)]
}
num_variables
##  [1] "cell_ID"                                        
##  [2] "fov"                                            
##  [3] "nCount_RNA"                                     
##  [4] "nFeature_RNA"                                   
##  [5] "nCount_negprobes"                               
##  [6] "nFeature_negprobes"                             
##  [7] "nCount_falsecode"                               
##  [8] "nFeature_falsecode"                             
##  [9] "Area"                                           
## [10] "AspectRatio"                                    
## [11] "Width"                                          
## [12] "Height"                                         
## [13] "Mean.Histone"                                   
## [14] "Max.Histone"                                    
## [15] "Mean.G"                                         
## [16] "Max.G"                                          
## [17] "Mean.rRNA"                                      
## [18] "Max.rRNA"                                       
## [19] "Mean.GFAP"                                      
## [20] "Max.GFAP"                                       
## [21] "Mean.DAPI"                                      
## [22] "Max.DAPI"                                       
## [23] "slide_ID"                                       
## [24] "Area.um2"                                       
## [25] "CenterX_local_px"                               
## [26] "CenterY_local_px"                               
## [27] "propNegative"                                   
## [28] "complexity"                                     
## [29] "errorCtEstimate"                                
## [30] "percOfDataFromError"                            
## [31] "median_negprobes"                               
## [32] "negprobes_quantile_0.9"                         
## [33] "median_RNA"                                     
## [34] "RNA_quantile_0.9"                               
## [35] "nCell"                                          
## [36] "nCount"                                         
## [37] "nCountPerCell"                                  
## [38] "nFeaturePerCell"                                
## [39] "propNegativeCellAvg"                            
## [40] "complexityCellAvg"                              
## [41] "errorCtPerCellEstimate"                         
## [42] "percOfDataFromErrorPerCell"                     
## [43] "prob"                                           
## [44] "RNA_spatialClusteringNeighbours_astro.1"        
## [45] "RNA_spatialClusteringNeighbours_astro.2"        
## [46] "RNA_spatialClusteringNeighbours_endothelial"    
## [47] "RNA_spatialClusteringNeighbours_Inh.1"          
## [48] "RNA_spatialClusteringNeighbours_Inh.2"          
## [49] "RNA_spatialClusteringNeighbours_Inh.3"          
## [50] "RNA_spatialClusteringNeighbours_L2_3"           
## [51] "RNA_spatialClusteringNeighbours_L4"             
## [52] "RNA_spatialClusteringNeighbours_L6"             
## [53] "RNA_spatialClusteringNeighbours_microglia.1"    
## [54] "RNA_spatialClusteringNeighbours_microglia.2"    
## [55] "RNA_spatialClusteringNeighbours_oligodendrocyte"
## [56] "RNA_spatialClusteringNeighbours_OPC"            
## [57] "RNA_spatialClusteringNeighbours_unknown"        
## [58] "sum"                                            
## [59] "detected"                                       
## [60] "subsets_NegProb_sum"                            
## [61] "subsets_NegProb_detected"                       
## [62] "subsets_NegProb_percent"                        
## [63] "total"
variables <- c("total", "detected")

Large-scale to small-scale

Approaching dataset preprocessing starting from global-scale, proceeding to local-scale, then fov-focused exploratory analysis

Sample map with fov grid

fov_pattern <- "S3_fov_positions_file.csv.gz"
metadata_file <- file.path(dirname, list.files(dirname, meta_pattern))
fovpos_file <- file.path(dirname, list.files(dirname, fov_pattern))

metadata <- data.table::fread(metadata_file)
fov_positions <- data.table::fread(fovpos_file, header = T)
fov_positions <- fov_positions[fov_positions$FOV%in%unique(metadata$fov),]
fov_positions <- fov_positions |> mutate(CenterX_global_px = X_mm*25000/3, CenterY_global_px = Y_mm*25000/3)
center_x <- max(fov_positions$CenterX_global_px)/2
center_y <- max(fov_positions$CenterY_global_px)/2
fov_positions <- fov_positions |> mutate(CenterX_global_px = -(CenterX_global_px - center_x) + center_x, CenterY_global_px = -(CenterY_global_px - center_y) + center_y)

# image theme
my_image_theme <- function(back.color="black",back.border=NA,title.col="white") {
  theme(panel.border = element_blank(),
        legend.key = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        panel.grid = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        panel.background = element_rect(fill = "transparent",colour = NA),
        plot.title = element_text(color = title.col,hjust = 0.5,face = "bold"),
        plot.background = element_rect(fill = "transparent",colour = back.border))
}

fov_xdim <- 4000
fov_ydim <- 4000

ggplot() + 
  geom_point(data = metadata, mapping = aes(x = CenterX_global_px, y = CenterY_global_px), colour = "darkmagenta", fill="darkmagenta", size = 0.05, alpha = 0.2) +
  annotate("rect", xmin = fov_positions$CenterX_global_px, 
           xmax = fov_positions$CenterX_global_px + fov_xdim, 
           ymin = fov_positions$CenterY_global_px, 
           ymax = fov_positions$CenterY_global_px - fov_ydim,
           alpha = .2, color = "black",linewidth = 0.2) +
  geom_text(aes(x = fov_positions$CenterX_global_px + fov_xdim/2,
                y = fov_positions$CenterY_global_px + fov_ydim/2 - fov_ydim,
                label = fov_positions$FOV), color="black", fontface = "bold") +
  ggtitle(sample_name) + 
  my_image_theme(back.color = "white", back.border = "white", title.col = "black")

Numeric/integer variable dependence on global position

Importing global coordinates from spatialCoords slot since they are not available in colData

colData(spe)$CenterX_global_px <- spatialCoords(spe)[,1]
colData(spe)$CenterY_global_px <- spatialCoords(spe)[,2]

Global coordinate scatterplot colored by variables showing spatial pattern

Global coordinate scatterplot colored by variables showing spatial pattern

spe$CenterX_global_um <- spe$CenterX_global_px*0.18
spe$CenterY_global_um <- spe$CenterY_global_px*0.18  
spe$logtot <- log10(spe$total)

plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "total", point_size = 0.1) + ggtitle("Total counts per cell")

plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "logtot", point_size = 0.1) + ggtitle("Log-total counts per cell")

plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "detected", point_size = 0.1) + ggtitle("Detected features per cell")

spe$um_area <- spe$Area*0.0324
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "um_area", point_size = 0.1) + ggtitle("Area in μm per cell")

spe$log_um_area <- log10(spe$um_area)
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "log_um_area", point_size = 0.1) + ggtitle("Log_um_area per cell")

plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "AspectRatio", point_size = 0.1) + ggtitle("Width/Height ratio per cell")

spe$target_counts <- spe$total - spe$subsets_NegProb_sum
spe$target_detected <- spe$detected - spe$subsets_NegProb_detected
spe$tot_det_ratio <- spe$target_counts/spe$target_detected
plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "tot_det_ratio", point_size = 0.1) + ggtitle("Target counts/detected features ratio per cell")

plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "subsets_NegProb_sum", point_size = 0.1) + ggtitle("Negative control probe counts per cell")

Check dependence on fov

Global coordinate scatterplot colored by binary color code: in black are the cells below the set variable threshold, in grey all the cells above

threshold <- 0.10
spe$var_color <- as.factor(ifelse(colData(spe)$total < quantile(colData(spe)$total, probs = threshold), "black", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Counts below 10% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.05
spe$var_color <- as.factor(ifelse(colData(spe)$total < quantile(colData(spe)$total, probs = threshold), "black", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Counts below 5% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

threshold <- 0.10
spe$var_color <- as.factor(ifelse(colData(spe)$detected < quantile(colData(spe)$detected, probs = threshold), "black", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Detected features below 10% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.05
spe$var_color <- as.factor(ifelse(colData(spe)$detected < quantile(colData(spe)$detected, probs = threshold), "black", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Detected features below 5% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

spe$um_area <- spe$Area*0.0324
threshold <- 0.10
spe$var_color <- as.factor(ifelse(colData(spe)$um_area < quantile(colData(spe)$um_area, probs = threshold), "black", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Area in μm below 10%") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.05
spe$var_color <- as.factor(ifelse(colData(spe)$um_area < quantile(colData(spe)$um_area, probs = threshold), "black", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Area in μm below 5%")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

threshold <- 0.90
spe$var_color <- as.factor(ifelse(colData(spe)$um_area > quantile(colData(spe)$um_area, probs = threshold), "red", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(red = "red",lightgrey = "lightgrey")) + ggtitle("Area in μm above 90% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.95
spe$var_color <- as.factor(ifelse(colData(spe)$um_area > quantile(colData(spe)$um_area, probs = threshold), "red", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(red = "red",lightgrey = "lightgrey")) + ggtitle("Area in μm above 95% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

threshold <- 0.10
spe$var_color <- as.factor(ifelse(colData(spe)$AspectRatio < quantile(colData(spe)$AspectRatio, probs = threshold), "black", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Width/height below 10% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.05
spe$var_color <- as.factor(ifelse(colData(spe)$AspectRatio < quantile(colData(spe)$AspectRatio, probs = threshold), "black", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Width/height below 5% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

threshold <- 0.90
spe$var_color <- as.factor(ifelse(colData(spe)$AspectRatio > quantile(colData(spe)$AspectRatio, probs = threshold), "red", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(red = "red",lightgrey = "lightgrey")) + ggtitle("Width/height above 90% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.95
spe$var_color <- as.factor(ifelse(colData(spe)$AspectRatio > quantile(colData(spe)$AspectRatio, probs = threshold), "red", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(red = "red",lightgrey = "lightgrey")) + ggtitle("Width/height above 95% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

threshold <- 0.10
spe$var_color <- as.factor(ifelse(colData(spe)$tot_det_ratio < quantile(colData(spe)$tot_det_ratio, probs = threshold, na.rm = T), "black", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Target counts/features below 10% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.05
spe$var_color <- as.factor(ifelse(colData(spe)$tot_det_ratio < quantile(colData(spe)$tot_det_ratio, probs = threshold, na.rm = T), "black", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(black = "black",lightgrey = "lightgrey")) + ggtitle("Target counts/features below 5% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

threshold <- 0.50
spe$var_color <- as.factor(ifelse(colData(spe)$subsets_NegProb_sum > quantile(colData(spe)$subsets_NegProb_sum, probs = threshold, na.rm = T), "red", "lightgrey"))
p1 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(red = "red",lightgrey = "lightgrey")) + ggtitle("Negative probe counts above 50% threshold") + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
threshold <- 0.75
spe$var_color <- as.factor(ifelse(colData(spe)$subsets_NegProb_sum > quantile(colData(spe)$subsets_NegProb_sum, probs = threshold, na.rm = T), "red", "lightgrey"))
p2 <- plotColData(spe, "CenterY_global_um", "CenterX_global_um", colour_by = "var_color", 
            point_size = 0.1) + scale_color_manual(values = c(red = "red",lightgrey = "lightgrey")) + ggtitle("Negative probe counts above 75% threshold")  + theme(legend.position="none")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plot_grid(p1, p2, ncol = 2)

Heatmap with fovs colored by mean/median values of variable of interest

meta_df <- data.frame(colData(spe))
fov_cellcounts <- as.integer(table(colData(spe)$fov))
meta_df <- group_by(meta_df, fov) |> summarize(mean_total = mean(total), mean_detected = mean(detected), mean_areaum = mean(um_area), mean_aspectratio = mean(AspectRatio), mean_tot_det_ratio = mean(tot_det_ratio), mean_subsets_NegProb_sum = mean(subsets_NegProb_sum), median_total = median(total), median_detected = median(detected), median_areaum = median(um_area), median_aspectratio = median(AspectRatio), median_tot_det_ratio = median(tot_det_ratio), median_subsets_NegProb_sum = median(subsets_NegProb_sum))

meta_df <- cbind(meta_df, fov_cellcounts)

fov_positions <- fov_positions |> mutate(fov = FOV, FOV = NULL)
fov_positions <- left_join(fov_positions, meta_df, by = join_by(fov))

fov_size <- 4000
poly_df <- transmute(fov_positions, x_1 = CenterX_global_px, y_1 = CenterY_global_px, 
                     x_2 = CenterX_global_px + fov_size, y_2 = CenterY_global_px, 
                     x_3 = CenterX_global_px + fov_size, y_3 = CenterY_global_px + fov_size, 
                     x_4 = CenterX_global_px, y_4 = CenterY_global_px + fov_size)

str(poly_df)
## Classes 'data.table' and 'data.frame':   392 obs. of  8 variables:
##  $ x_1: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ y_1: num  93851 89585 85319 81053 76787 ...
##  $ x_2: num  4000 4000 4000 4000 4000 4000 4000 4000 4000 4000 ...
##  $ y_2: num  93851 89585 85319 81053 76787 ...
##  $ x_3: num  4000 4000 4000 4000 4000 4000 4000 4000 4000 4000 ...
##  $ y_3: num  97851 93585 89319 85053 80787 ...
##  $ x_4: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ y_4: num  97851 93585 89319 85053 80787 ...
##  - attr(*, ".internal.selfref")=<externalptr>
ls_list <- list()
for (k in 1:dim(fov_positions)[1]){
  ls_list[[k]] <- st_linestring(matrix(as.numeric(poly_df[k]), ncol = 2, byrow = T))
}

sf_data <- st_as_sf(fov_positions, coords = c("CenterX_global_px", "CenterY_global_px"), geometry = st_sfc(ls_list))

sf_data <- st_cast(sf_data, "POLYGON")

# plot(sf_data)
names(sf_data)
##  [1] "Slide"                      "X_mm"                      
##  [3] "Y_mm"                       "fov"                       
##  [5] "mean_total"                 "mean_detected"             
##  [7] "mean_areaum"                "mean_aspectratio"          
##  [9] "mean_tot_det_ratio"         "mean_subsets_NegProb_sum"  
## [11] "median_total"               "median_detected"           
## [13] "median_areaum"              "median_aspectratio"        
## [15] "median_tot_det_ratio"       "median_subsets_NegProb_sum"
## [17] "fov_cellcounts"             "geometry"
sf_data$fov <- as.factor(sf_data$fov)
heatmap_variables <- setdiff(names(sf_data), c("Slide", "X_mm", "Y_mm", "fov", "fov_cellcounts", "geometry"))
for (var in 1:length(heatmap_variables)){
  heat_var <- heatmap_variables[var]
  p <- ggplot() +
  geom_sf(data = sf_data, aes(fill = .data[[heat_var]])) + 
  scale_fill_viridis_c() +
  theme(axis.title.x = element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(), 
        axis.title.y = element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid = element_blank()) +
  geom_text(data = fov_positions, aes(x = fov_positions$CenterX_global_px+fov_size/2,
                y = fov_positions$CenterY_global_px+fov_size/2,
                label = fov_positions$fov), color="white", size = 2) +
    ggtitle(paste0(heat_var))
  print(p)
}  

Boxplot of each of the numeric/integer variable distribution by fov, useful to see spatial patterns in data

variables <- c(variables, "logtot", "um_area", "log_um_area", "AspectRatio", "tot_det_ratio", "subsets_NegProb_sum")

for (var in 1:length(variables)){
  n <- grep(paste0("^", variables[var], "$"), colnames(spe@colData))
  boxplot(spe@colData@listData[[n]] ~ spe$fov, ylab = colnames(spe@colData)[n], cex = 0.2)
  title(paste0(colnames(spe@colData)[n], "~fov"))
}

Per fov cell counts

fov_df <- data.frame("n_cells" = as.integer(table(spe@colData@listData$fov)), "fov" = as.factor(names(table(spe@colData@listData$fov))))
levels(fov_df$fov) <- order(levels(fov_df$fov), seq(1:length(fov_df$fov)))

ggplot(data = fov_df, aes(x = fov, y = n_cells)) +
  geom_col(fill = "#B0C4DE") + theme(legend.position ="none") +
  ggtitle("Number of cells per fov") 

Total/Area scatterplot

cell_df <- as.data.frame(spe@colData@listData)

ggplot(data = cell_df, aes(x = um_area, y = total)) +
  geom_point() + 
  ggtitle("Counts ~ μm area")

Detected/Area scatterplot

ggplot(data = cell_df, aes(x = um_area, y = detected)) +
  geom_point() + 
  ggtitle("Detected features ~ μm area")

AspectRatio/Area scatterplot

ggplot(data = cell_df, aes(x = um_area, y = AspectRatio)) +
  geom_point() + 
  ggtitle("AspectRatio ~ μm area")

Total/Area scatterplot faceted by fov

cell_df_red <- subset(cell_df, subset = fov >=1 & fov <= 50)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=51 & fov <= 100)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=101 & fov <= 150)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=151 & fov <= 200)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=201 & fov <= 250)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=251 & fov <= 300)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=301 & fov <= 350)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=351 & fov <= 392)

ggplot(data = cell_df_red, aes(x = um_area, y = total)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Total counts ~ μm area by fov")

Detected/Area scatterplot faceted by fov

cell_df_red <- subset(cell_df, subset = fov >=1 & fov <= 50)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=51 & fov <= 100)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=101 & fov <= 150)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=151 & fov <= 200)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=201 & fov <= 250)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=251 & fov <= 300)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=301 & fov <= 350)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=351 & fov <= 392)

ggplot(data = cell_df_red, aes(x = um_area, y = detected)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("Detected features ~ μm area by fov")

AspectRatio/Area scatterplot faceted by fov

cell_df_red <- subset(cell_df, subset = fov >=1 & fov <= 50)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=51 & fov <= 100)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=101 & fov <= 150)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=151 & fov <= 200)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=201 & fov <= 250)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=251 & fov <= 300)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=301 & fov <=350)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")

cell_df_red <- subset(cell_df, subset = fov >=351 & fov <= 392)

ggplot(data = cell_df_red, aes(x = um_area, y = AspectRatio)) +
  geom_point(size = 0.1) +
  facet_wrap(~fov) + 
  ggtitle("AspectRatio ~ μm area by fov")